import jieba
import numpy as np
import heapq
 
testList = [1,2,3,4,5]
tmp = zip(range(len(testList)), testList)
large5 = heapq.nlargest(5, tmp, key=lambda x:x[1])
print(large5)

class keywordExtractor():
    def __init__(self):
        idf_dict = {}
        data_list = []
        with open("./data/idf.txt") as f:
            for line in f:
                ll = line.strip().split(" ")
                if len(ll) != 2:
                    continue
                if ll[0] not in idf_dict:
                    idf_dict[ll[0]] = float(ll[1])
                data_list.append(float(ll[1]))
        self.__idf_dict = idf_dict
        self.median = np.median(data_list)
    
    def get_idf(self,word):
        return self.__idf_dict.get(word, self.median)
    
    def predict(self, query, top_n = 1):
        if len(query) <= 2:
            return [query]

        word_list = list(jieba.cut(query))
        if len(word_list) < top_n:
            return word_list
        
        idf_list = []
        for word in word_list:
            idf_list.append(self.get_idf(word))
        
        # 归一化
        weight_list = [i / max(idf_list) for i in idf_list]

        zip_list = zip(range(len(weight_list)), weight_list)
        n_large_idx = [i[0] for i in heapq.nlargest(n, zip_list)]

        return [word_list[i] for i in n_large_idx]
    
if __name__ == "__main__":
    keyword_extractor = keywordExtractor()
    print(keyword_extractor.predict("今天天气怎么样"))